import pandas as pd
import json
def read_excel(file_path):
    df = pd.read_excel(file_path, engine='openpyxl')
    print(df.head())
    return df
def convert_to_query(df):
    json_data = []
    for _, row in df.iterrows():
        item = {
            "工单信息": {
                "业务部门": {
                    "所属系统": row["所属系统"],
                    "工单描述": row["工单描述"],
                    "工单详情": row["工单详情"]
                },
                "技术部门": {
                    "事件原因": row["事件原因"],
                    "解决方案": row["解决方案"],
                    "工单分类": row["工单分类"],
                    "问题分类": row["问题分类"]
                }
            }
        }
        json_data.append(item)
    return json_data

def convert_to_response(df):
    json_data = []
    for _, row in df.iterrows():
        item = {
            "审核部门": {
                "是否合格": row["是否合格"],
                "不合格原因": row["不合格原因"]
            }
        }
        json_data.append(item)
    return json_data

if __name__ == "__main__":
    # step1: 读取Excel文件
    file_path = r'Data\一体化运维平台2025年2月6日工单检查情况.xlsx'  # 替换为你的Excel文件路径
    df = read_excel(file_path)

    # step2: 将excel数据转换为JSON格式
    query = convert_to_query(df)  
    response = convert_to_response(df)
    print(json.dumps(query[2], indent=4, ensure_ascii=False))
    print(json.dumps(response[2], indent=4, ensure_ascii=False))

    # step3: 将前3条JSON数据写入jsonl文件
    with open('dataset.jsonl', 'w', encoding='utf-8') as f:
        for q, r in zip(query, response):
        #for q, r in zip(query[:3], response[:3]):
            q_str = json.dumps(q, ensure_ascii=False)
            query_str = "帮我判断以下工单是否合格，并以json格式输出："+q_str
            r_str = json.dumps(r, ensure_ascii=False)
            combined = {
                "system": "",
                "query": query_str,
                "response": r_str
            }
            json.dump(combined, f, ensure_ascii=False)
            f.write('\n')
